package com.evil.mi.crawler;

import cn.hutool.http.Header;
import cn.hutool.http.HttpRequest;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

import java.text.MessageFormat;
import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.BiFunction;
import java.util.stream.Collectors;


/**
 * com.evil.mi.cawler
 *
 * @author 白雨浓
 * date: 2018/8/18 16:31
 * content:
 */
@Component
public class VideoListCrawlerComponent {

    @Value("${http}")
    private String HTTP;

    public List<String> getListDetail(int current) {
        String html = this.getListHtml(HTTP + "/forum-181-" + current + ".html");

        Document document = Jsoup.parse(html);
        List<Element> tbodyElements = document.body().getElementsByTag("tbody");

        return tbodyElements.stream()
                .filter(element -> element.id() != null &&
                        element.id().contains("normalthread"))
                .map(element -> element.select("a").get(0).attr("href"))
                .collect(Collectors.toList());
    }

    public String getListHtml(String url) {
        String ip = MessageFormat.format("151.{0}.{1}.{2}",
                new Random().nextInt(254),
                new Random().nextInt(254),
                new Random().nextInt(254));

        return HttpRequest.get(url)
                .header(Header.USER_AGENT, "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.3.2.17331")
                .header(Header.COOKIE, "WMwh_2132_saltkey=DeJHUhqZ; WMwh_2132_lastvisit=1534560070; UM_distinctid=1654b28b97241-0da6c600feabd-4947311e-100200-1654b28b97373b; HstCfa2810755=1534564298582; HstCmu2810755=1534564298582; yunsuo_session_verify=41b75f5ed46e14cf80a01498ec96f164; WMwh_2132_sendmail=1; WMwh_2132_st_t=0%7C1534580791%7C71b94cbd2bec1f6a8b651b99502da183; WMwh_2132_forum_lastvisit=D_56_1534567498D_57_1534567575D_283_1534567631D_199_1534567656D_42_1534567731D_181_1534580791; Hm_lvt_acfaccaaa388521ba7e29a5e15cf85ad=1534564285,1534567904,1534575472,1534581401; Hm_lpvt_acfaccaaa388521ba7e29a5e15cf85ad=1534581412; CNZZDATA1254190848=915665222-1534559073-http%253A%252F%252Ft.thzdz.com%252F%7C1534580673; HstCla2810755=1534581412227; HstPn2810755=1; HstPt2810755=76; HstCnv2810755=3; HstCns2810755=5; WMwh_2132_lastact=1534580792%09misc.php%09secqaa;WMwh_2132_secqaa=341155.75feacd118013ba025")
                .header("X-Forwarded-For", ip)
                .header("Cache-Control", "max-age=0")
                .header("Upgrade-Insecure-Requests", "1")
                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
                .execute().body();
    }

}
